Set knitting options
# global knitting options for automatic saving of all plots as .png and .pdf. Also sets cache directory.
knitr::opts_chunk$set(
dev = c("png", "pdf"), fig.keep = "all",
dev.args = list(pdf = list(encoding = "WinAnsi", useDingbats = FALSE)),
fig.path = file.path("fig_output/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input()))),
cache.path = file.path("cache/", paste0(gsub("\\.[Rr]md", "/", knitr::current_input())))
)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rlang)
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
library(latex2exp)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# source all relevant scripting files
source(file.path("scripts", "ampliverse_05.R"))
source(file.path("scripts", "plotting_functions.R"))
Load DADA2 output data
# taxa and sequence tables obtained from https://github.com/danote/Samail_16S_compilation
seqtab_OM18 <- read_rds("data_raw/16S_sequencing_data/seqtab_nochim_OM18_processed_20200719.rds")
taxtab_OM18 <- read_rds("data_raw/16S_sequencing_data/taxa_OM18_processed_20200720.rds")
Load metadata
meta_map_OM18 <- read_delim("data_raw/16S_sequencing_data/map_for_compilation_OM18.txt", delim = "\t",
col_types = cols(
sample_id = col_character(),
barcode_sequence = col_character(),
forward_linker_primer_sequence = col_character(),
reverse_primer_sequence = col_character(),
sample_type = col_character(),
nucleic_acid_type = col_character(),
sampling_site = col_character(),
year_sampled = col_double(),
month_sampled = col_double(),
day_sampled = col_double(),
depth_fluid_intake_mbct = col_double(),
notes = col_character(),
sampling_method = col_character(),
upper_packer_inflated = col_logical(),
upper_packer_depth_mbct = col_double(),
lower_packer_inflated = col_logical(),
lower_packer_depth_mbct = col_double(),
well_depth_mbgl = col_double(),
casing_extent_mbct = col_double(),
casing_height_magl = col_double(),
screened_interval_mbct = col_character(),
depth_to_water_mbct = col_double()
)
) %>% select(1:22)
ampli_data_OM18 <- ampli_tidy_dada2(seqtab_OM18, taxtab_OM18) %>% ampli_concat_tax() %>% ampli_join_metadata_map(meta_map_OM18)
ampli_data_OM18_sum <- ampli_data_OM18 %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_data_OM18_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_data_OM18_sum %>% select(reads_sum))
## reads_sum
## Min. : 63.0
## 1st Qu.: 959.2
## Median :22249.5
## Mean :17501.7
## 3rd Qu.:29552.0
## Max. :45606.0
Plot read counts
Oman groundwater samples have significantly higher read counts than extraction or PCR controls, which is good.
plot_reads_sums_1 <- ampli_data_OM18_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
fill = sample_type
)) +
geom_bar(stat = "identity") +
scale_y_continuous(name = "Reads") +
scale_x_discrete(name = "Sample ID") +
scale_fill_discrete(name = "Sample type") +
theme_bw()+
theme(
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom"
)
plot_reads_sums_1
# Focus on data of interest ## Filter for only desired samples
# define sample set ID's
samples_to_keep <- c("CM2A_45_9D", "NSHQ14_45_7J", "NSHQ4_S_11D", "WAB103_45_10J", "WAB104_45_5C", "WAB105_45_6D", "WAB188_45_4J", "WAB55_45_8C", "WAB71_45_3D")
# keep just those samples
ampli_data_OM18_focus_samples <- ampli_data_OM18 %>% ampli_filter_strings(col_to_filter = sample_id, strings_to_filter = samples_to_keep, detection_method = "complete", action = "keep") %>%
# remove taxa with zero reads (messes up plotting later if kept)
ampli_rm_0_read_taxa()
## 2119 zero-read taxa removed from data set.
Filter out mitochondria, chloroplasts, eukaryotes, and sequences not assigned taxonomy at the the domain level
ampli_data_OM18_focus_samples_taxa_filtered <- ampli_data_OM18_focus_samples %>% ampli_filter_strings(col_to_filter = taxonomy, strings_to_filter = c("Chloroplast", "Mitochondria", "Eukaryota", "k__NA"), detection_method = "substring", action = "remove")
Tally reads per sample
ampli_data_OM18_focus_samples_taxa_filtered_sum <- ampli_data_OM18_focus_samples_taxa_filtered %>% ampli_tally_reads(c("year_sampled","sample_type"))
# sort by read counts
ampli_data_OM18_focus_samples_taxa_filtered_sum %>% arrange(desc(reads_sum))
# generate summary stats of read counts
summary(ampli_data_OM18_focus_samples_taxa_filtered_sum %>% select(reads_sum))
## reads_sum
## Min. :24474
## 1st Qu.:27107
## Median :32502
## Mean :31460
## 3rd Qu.:33318
## Max. :40720
Plot read counts
plot_reads_sums_2 <- ampli_data_OM18_focus_samples_taxa_filtered_sum %>% ggplot(aes(
x = fct_reorder(sample_id, desc(reads_sum)),
y = reads_sum,
label = reads_sum
)) +
geom_bar(stat = "identity") +
geom_text(nudge_y = 1200) +
scale_x_discrete(name = "Sample ID") +
scale_y_continuous(name = "Reads") +
theme_bw()+
theme(
axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1),
legend.position = "bottom",
panel.grid = element_blank()
)
plot_reads_sums_2
ampli_data_OM18_focus_samples_taxa_filtered <- ampli_data_OM18_focus_samples_taxa_filtered %>% ampli_calc_rel_abund()
ampli_data_OM18_focus_samples_taxa_filtered %>% head()
Check that relative abundances add up to 1, as expected
ampli_data_OM18_focus_samples_taxa_filtered %>% group_by(sample_id) %>% summarise(rel_abund_sum = sum(rel_abund), .groups = "drop") %>% summary()
## sample_id rel_abund_sum
## Length:9 Min. :1
## Class :character 1st Qu.:1
## Mode :character Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
OM18_heat <- ampli_data_OM18_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_zero = "n.r.", text_label_threshold_round_priority = "round", top_n = 50, y_taxa_arrangement = "abund")
OM18_heat +
# plot geometry
geom_text(parse = FALSE, size = 2.25) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 8.1) +
theme(
legend.position = "bottom",
panel.grid = element_blank()
)
CH4_taxa <- c("Methanobacteria", "Methanomicrobia", "Methanopyrales", "Methanocellales", "Methanoplasmatales", "Methanosarcinales", "Methanomassiliicocc", "Methylococc", "Methylocystis", "Methylosinus", "Methylocella", "Methylocapsa", "Methylacidiphil", "Methylomirabilis", "ANME")
ampli_data_OM18_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", text_label_scalar = 100, text_label_decimal_places = 1, text_label_format = "scientific") +
# plot geometry
geom_text(parse = TRUE, size = 2.1) +
# plot styling
scale_fill_gradient(name = "Read relative\nabundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = NULL, expand = c(0,0)) +
scale_y_discrete(name = NULL, expand = c(0,0)) +
theme_bw(base_size = 6.5) +
theme(
legend.position = "bottom",
panel.grid = element_blank()
)
## Warning: `as_character()` is deprecated as of rlang 0.4.0
## Please use `vctrs::vec_cast()` instead.
## This warning is displayed once per session.
CH4_plotted_taxa_names_tbl <- ampli_data_OM18_focus_samples_taxa_filtered %>%
ampli_heat_map(x_sample_group_col = sampling_site, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, y_taxa_arrangement = "alpha", return_plot_data_tbl = TRUE)
CH4_plotted_taxa_names_tbl
CH4_plotted_taxa_names_tbl_edit <- CH4_plotted_taxa_names_tbl %>% mutate(tax_short = case_when(
taxonomy == "k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobacterium; s__NA" ~ "g. $\\textit{Methanobacterium}$",
taxonomy == "k__Archaea; p__Thermoplasmatota; c__Thermoplasmata; o__Methanomassiliicoccales; f__NA; g__NA; s__NA" ~ "o. Methanomassiliicoccales",
taxonomy == "k__Archaea; p__Halobacterota; c__ANME-1; o__ANME-1b; f__NA; g__NA; s__NA" ~ "o. ANME-1b",
taxonomy == "k__Bacteria; p__Methylomirabilota; c__Methylomirabilia; o__Methylomirabilales; f__Methylomirabilaceae; g__Candidatus_Methylomirabilis; s__NA" ~ "g. \\textit{Ca.} Methylomirabilis",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Beijerinckiaceae; g__Methylocystis; s__NA" ~ "g. \\textit{Methylocystis}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methylocaldum; s__NA" ~ "g. \\textit{Methylocaldum}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methylococcus; s__NA" ~ "g. \\textit{Methylococcus}",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methyloterricola; s__NA" ~ "g. \\textit{Methyloterricola}"
))
CH4_plotted_taxa_names_tbl_edit$tax_short <- factor(CH4_plotted_taxa_names_tbl_edit$tax_short, levels = c(
"g. $\\textit{Methanobacterium}$",
"o. Methanomassiliicoccales",
"o. ANME-1b",
"g. \\textit{Ca.} Methylomirabilis",
"g. \\textit{Methylocystis}",
"g. \\textit{Methyloterricola}",
"g. \\textit{Methylocaldum}",
"g. \\textit{Methylococcus}"))
CH4_plotted_taxa_names_tbl_edit
ampli_data_OM18_focus_samples_taxa_filtered_water_type <- ampli_data_OM18_focus_samples_taxa_filtered %>% mutate(water_type = case_when(
sampling_site == "CM2A" ~ "Ca$^{2+}$ - OH$^{-}$",
sampling_site == "NSHQ04" ~ "Ca$^{2+}$ - OH$^{-}$",
sampling_site == "NSHQ14" ~ "Ca$^{2+}$ - OH$^{-}$",
sampling_site == "WAB71" ~ "Ca$^{2+}$ - OH$^{-}$",
sampling_site == "WAB104" ~ "Mg$^{2+}$ - HCO$_{3}^{-}$",
sampling_site == "WAB105" ~ "Mg$^{2+}$ - HCO$_{3}^{-}$",
sampling_site == "WAB55" ~ "Mg$^{2+}$ - HCO$_{3}^{-}$",
sampling_site == "WAB188" ~ "gabbro",
sampling_site == "WAB103" ~ "gabbro",
))
ampli_data_OM18_focus_samples_taxa_filtered_water_type$water_type <- factor(ampli_data_OM18_focus_samples_taxa_filtered_water_type$water_type, levels = c(
"gabbro",
"Mg$^{2+}$ - HCO$_{3}^{-}$",
"Ca$^{2+}$ - OH$^{-}$"
))
ampli_data_OM18_focus_samples_taxa_filtered_water_type
ampli_data_OM18_focus_samples_taxa_filtered_water_type$sampling_site <- factor(ampli_data_OM18_focus_samples_taxa_filtered_water_type$sampling_site, levels = c(
"WAB103",
"WAB188",
"WAB104",
"WAB105",
"WAB55",
"NSHQ04",
"WAB71",
"CM2A",
"NSHQ14"
))
ampli_data_OM18_focus_samples_taxa_filtered_water_type_tax_fun <- ampli_data_OM18_focus_samples_taxa_filtered_water_type %>%
mutate(tax_fun = case_when(
taxonomy == "k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobacterium; s__NA" ~ "Methanogenesis",
taxonomy == "k__Archaea; p__Thermoplasmatota; c__Thermoplasmata; o__Methanomassiliicoccales; f__NA; g__NA; s__NA" ~ "Methanogenesis",
taxonomy == "k__Archaea; p__Halobacterota; c__ANME-1; o__ANME-1b; f__NA; g__NA; s__NA" ~ "Methanotrophy\nusing sulfate",
taxonomy == "k__Bacteria; p__Methylomirabilota; c__Methylomirabilia; o__Methylomirabilales; f__Methylomirabilaceae; g__Candidatus_Methylomirabilis; s__NA" ~ "Methanotrophy\nusing nitrite",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; o__Rhizobiales; f__Beijerinckiaceae; g__Methylocystis; s__NA" ~ "Methanotrophy\nusing oxygen",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methylocaldum; s__NA" ~ "Methanotrophy\nusing oxygen",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methylococcus; s__NA" ~ "Methanotrophy\nusing oxygen",
taxonomy == "k__Bacteria; p__Proteobacteria; c__Gammaproteobacteria; o__Methylococcales; f__Methylococcaceae; g__Methyloterricola; s__NA" ~ "Methanotrophy\nusing oxygen"
))
ampli_data_OM18_focus_samples_taxa_filtered_water_type_tax_fun$tax_fun <- factor(ampli_data_OM18_focus_samples_taxa_filtered_water_type_tax_fun$tax_fun, levels = c(
"Methanotrophy\nusing oxygen",
"Methanotrophy\nusing nitrite",
"Methanotrophy\nusing sulfate",
"Methanogenesis"
))
ampli_data_OM18_focus_samples_taxa_filtered_water_type_tax_fun %>%
ampli_heat_map(x_sample_group_col = sampling_site, facet_grid_sample_group_col = water_type, facet_grid_taxa_group_col = tax_fun, taxa_selection_method = "custom_taxa_char_vector", custom_taxa_char_vector = CH4_taxa, plot_other_taxa_bin = FALSE, custom_taxa_names_tbl = CH4_plotted_taxa_names_tbl_edit, custom_taxa_names_col = tax_short, facet_labeller = latex_labeller, text_label_format = "normal", text_label_scalar = 100, text_label_decimal_places = 0, text_label_threshold = 0.01, text_label_threshold_round_priority = "round", text_label_zero = "n.r.", y_taxa_arrangement = "custom") +
# plot geometry
geom_text(parse = FALSE, size = 2.65) +
# plot styling
scale_fill_gradient(name = "Read relative abundance / [%]", low = "white", high = "red", labels = label_percent(accuracy = 1, suffix = "")) +
scale_x_discrete(name = "Well grouped by water type", expand = c(0,0)) +
scale_y_discrete(name = "Deepest taxonomic assignment grouped by\nmetabolic capability inferred from phylogeny", expand = c(0,0), labels = TeX) +
theme_bw(base_size = 9.35) +
theme(
strip.text.y = element_text(angle = 0, vjust = 0.3),
legend.position = "bottom",
panel.grid = element_blank()
)
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font metrics unknown for character 0xa